home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
MacWorld 1998 March
/
Macworld (1998-03) (Disk 1).dmg
/
Shareware World
/
Info
/
For Developers
/
GhostScript 5.10
/
MacGS-510
/
files
/
pdf_base.ps
< prev
next >
Wrap
Text File
|
1997-09-25
|
15KB
|
457 lines
% Copyright (C) 1994, 1996, 1997 Aladdin Enterprises. All rights reserved.
%
% This file is part of Aladdin Ghostscript.
%
% Aladdin Ghostscript is distributed with NO WARRANTY OF ANY KIND. No author
% or distributor accepts any responsibility for the consequences of using it,
% or for whether it serves any particular purpose or works at all, unless he
% or she says so in writing. Refer to the Aladdin Ghostscript Free Public
% License (the "License") for full details.
%
% Every copy of Aladdin Ghostscript must include a copy of the License,
% normally in a plain ASCII text file named PUBLIC. The License grants you
% the right to copy, modify and redistribute Aladdin Ghostscript, but only
% under certain conditions described in the License. Among other things, the
% License requires that the copyright notice and this notice be preserved on
% all copies.
% pdf_base.ps
% Basic parser for PDF reader.
% This handles basic parsing of the file (including the trailer
% and cross-reference table), as well as objects, object references,
% and streams; it doesn't include any facilities for making marks on
% the page.
/.setlanguagelevel where { pop 2 .setlanguagelevel } if
.currentglobal true .setglobal
/pdfdict where { pop } { /pdfdict 100 dict def } ifelse
pdfdict begin
% We rebind #, #?, #dsc, and #dscfile later if we're writing out PostScript.
/# % <arg1> ... <argN> <opname> <N> # -
{ pop cvx exec
} bind def
/#?
{ false
} bind def
/#dsc % mark <obj1> ... #dsc -
{ cleartomark
} bind def
/#dscfile % <filename> #dscfile -
{ pop
} bind def
% Define the name interpretation dictionary for reading values.
/valueopdict mark
(<<) cvn { mark } bind % don't push an actual mark!
(>>) cvn /.dicttomark load
([) cvn { mark } bind % ditto
(]) cvn dup load
/true true
/false false
/null null
/F dup cvx % see Objects section below
/R dup cvx % see Objects section below
/stream dup cvx % see Streams section below
.dicttomark readonly def
% ------ Utilities ------ %
% Define a scratch string. The PDF language definition says that
% no line in a PDF file can exceed 255 characters.
/pdfstring 255 string def
% Read the previous line of a file. If we aren't at a line boundary,
% read the line containing the current position.
% Skip any blank lines.
/prevline % - prevline <startpos> <substring>
{ PDFfile fileposition dup () pdfstring
2 index 257 sub 0 .max PDFfile exch setfileposition
{ % Stack: initpos linepos line string
PDFfile fileposition
PDFfile 2 index readline pop
dup length 0 gt
{ 3 2 roll 5 -2 roll pop pop 2 index }
{ pop }
ifelse
% Stack: initpos linepos line string startpos
PDFfile fileposition 5 index ge { exit } if
pop
}
loop pop pop 3 -1 roll pop
} bind def
% Read a token from a file, recognizing the PDF 1.2 #nn escape convention.
% This should be done in C!
/.pdftoken % <file> .pdftoken <obj> -true-
% <file> .pdftoken -false-
{ token
{ dup type /nametype eq
{ dup xcheck
{ true
}
{ dup .namestring (#) search
{ name#escape cvn exch pop }
{ pop }
ifelse true
}
ifelse
}
{ true
}
ifelse
}
{ false
}
ifelse
} bind def
/name#escape % <post> <(#)> <pre> name#escape <string>
{ exch pop
1 index 2 () /SubFileDecode filter dup (x) readhexstring
% Stack: post pre stream char t/f
not { /.pdftoken cvx /syntaxerror signalerror } if
exch closefile concatstrings
exch 2 1 index length 2 sub getinterval
(#) search { name#escape } if concatstrings
} bind def
% Execute a file, interpreting its executable names in a given
% dictionary. The name procedures may do whatever they want
% to the operand stack.
/.pdfrun % <file> <opdict> .pdfrun -
{ % Construct a procedure with the stack depth, file and opdict
% bound into it.
1 index cvlit count 2 sub 3 1 roll mark mark 5 2 roll
{ % Stack: ..operands.. count opdict file
.pdftoken not { (%%EOF) cvn cvx } if
dup xcheck
{ DEBUG { dup == flush } if
2 copy .knownget
{ exch pop exch pop exch pop exec }
{ BXlevel 0 le
{ (%stderr) (w) file
dup (****************Unknown operator: ) writestring
dup 2 index .writecvs dup (\n) writestring flushfile
}
if pop pop
count exch sub { pop } repeat % pop all the operands
}
ifelse
}
{ exch pop exch pop DEBUG { dup ==only ( ) print flush } if
}
ifelse
}
aload pop .packtomark cvx
/loop cvx 2 packedarray cvx
{ stopped /PDFsource } aload pop
PDFsource
{ store { stop } if } aload pop .packtomark cvx
/PDFsource 3 -1 roll store exec
} bind def
% ------ File reading ------ %
% Read the cross-reference entry for an (unresolved) object.
% The caller must save and restore the PDFfile position if desired.
% For invalid (free) objects, we return 0.
/readxrefentry % <object#> readxrefentry <objpos>
{ dup Objects exch lget
PDFfile exch setfileposition
PDFfile token pop % object position
PDFfile token pop % generation #
PDFfile token pop % n or f
dup /n eq
{ pop 1 add dup 255 gt
{ Generations ltype /stringtype eq
{ % Convert Generations from a string to an array.
larray Generations llength lgrowto dup
0 1 2 index llength 1 sub
{ Generations 1 index lget lput dup
}
for pop /Generations exch store
}
if
}
if
}
{ /f eq
{ pop 0 }
{ /readxrefentry cvx /syntaxerror signalerror }
ifelse
}
ifelse
% Stack: obj# objpos 1+gen#
Generations 4 -1 roll 3 -1 roll lput
} bind def
% ================================ Objects ================================ %
% Since we may have more than 64K objects, we have to use a 2-D array to
% hold them (and the parallel Generations structure).
/lshift 9 def
/lnshift lshift neg def
/lsubmask 1 lshift bitshift 1 sub def
/lsublen lsubmask 1 add def
/larray { % - larray <larray>
[ [] ]
} bind def
/lstring { % - lstring <lstring>
[ () ]
} bind def
/ltype { % <lseq> type <type
0 get type
} bind def
/lget { % <lseq> <index> lget <value>
dup //lsubmask and 3 1 roll //lnshift bitshift get exch get
} bind def
/lput { % <lseq> <index> <value> lput -
3 1 roll
dup //lsubmask and 4 1 roll //lnshift bitshift get
3 1 roll put
} bind def
/llength { % <lseq> llength <length>
dup length 1 sub dup //lshift bitshift
3 1 roll get length add
} bind def
% lgrowto assumes newlength > llength(lseq)
/growto { % <string/array> <length> growto <string'/array'>
1 index type /stringtype eq { string } { array } ifelse
2 copy copy pop exch pop
} bind def
/lgrowto { % <lseq> <newlength> lgrowto <lseq'>
dup //lsubmask add //lnshift bitshift dup 3 index length gt {
% Add more sub-arrays. Start by completing the last existing one.
% Stack: lseq newlen newtoplen
3 -1 roll dup llength 1 sub //lsubmask or 1 add lgrowto
% Stack: newlen newtoplen lseq
[ exch aload pop
counttomark 2 add -1 roll % newtoplen
counttomark sub { dup 0 0 getinterval lsublen growto } repeat
dup 0 0 getinterval ] exch
} {
pop
} ifelse
% Expand the last sub-array.
1 sub //lsubmask and 1 add
exch dup dup length 1 sub 2 copy
% Stack: newsublen lseq lseq len-1 lseq len-1
get 5 -1 roll growto put
} bind def
% We represent an unresolved object reference by a procedure of the form
% {obj# gen# resolveR}. This is not a possible PDF object, because PDF has
% no way to represent procedures. Since PDF in fact has no way to represent
% any PostScript object that doesn't evaluate to itself, we can 'force'
% a possibly indirect object painlessly with 'exec'.
% Note that since we represent streams by executable dictionaries
% (see below), we need both an xcheck and a type check to determine
% whether an object has been resolved.
/unresolved? % <object#> unresolved? <bool>
{ Objects exch lget dup xcheck exch type /integertype eq and
} bind def
/oforce /exec load def
/oget % <array> <index> oget <object>
% <dict> <key> oget <object>
{ 2 copy get dup xcheck
{ exec dup 4 1 roll put }
{ exch pop exch pop }
ifelse
} bind def
% A null value in a dictionary is equivalent to an omitted key;
% we must check for this specially.
/knownoget
{ 2 copy known
{ oget dup null eq { pop false } { true } ifelse }
{ pop pop false }
ifelse
} bind def
% PDF 1.1 defines a 'foreign file reference', but not its meaning.
% Per the specification, we convert these to nulls.
/F % <file#> <object#> <generation#> F <object>
{ % Some PDF 1.1 files use F as a synonym for f!
count 3 lt { f } { pop pop pop null } ifelse
} bind def
% We keep track of objects in a pair of arrays, Objects and Generations.
% Generations[N] is 1+ the current generation number for object number N.
% (As far as we can tell, this is needed only for error checking.)
% If object N is loaded, Objects[N] is the actual object;
% otherwise, Objects[N] is an executable integer giving the file offset
% of the object's entry in the cross-reference table.
% For free objects, Generations[N] is 0.
/checkgeneration % <object#> <generation#> checkgeneration <object#> <OK>
{ Generations 2 index lget 1 sub 1 index eq
{ pop true
}
{ (Warning: wrong generation: ) print 1 index =only ( ) print = false
}
ifelse
} bind def
/R % <object#> <generation#> R <object>
{ 1 index unresolved?
{ /resolveR cvx 3 packedarray cvx }
{ checkgeneration { Objects exch lget } { pop null } ifelse }
ifelse
} bind def
% If we encounter an object definition while reading sequentially,
% we just store it away and keep going.
/objopdict mark
valueopdict { } forall
/endobj dup cvx
.dicttomark readonly def
/obj % <object#> <generation#> obj <object>
{ PDFfile objopdict .pdfrun
} bind def
/endobj % <object#> <generation#> <object> endobj <object>
{ 3 1 roll
% Read the xref entry if we haven't yet done so.
% This is only needed for generation # checking.
1 index unresolved?
{ PDFfile fileposition
2 index readxrefentry pop
PDFoffset add PDFfile exch setfileposition
} if
checkgeneration { Objects exch 2 index lput } { pop pop null } ifelse
} bind def
% When resolving an object reference, we stop at the endobj.
/resolveopdict mark
valueopdict { } forall
/endobj { endobj exit } bind
.dicttomark readonly def
/resolveR % <object#> <generation#> resolveR <object>
{ DEBUG { (%Resolving: ) print 2 copy 2 array astore == } if
1 index unresolved?
{ PDFfile fileposition 3 1 roll
1 index readxrefentry
3 1 roll checkgeneration
{ % Stack: savepos objpos obj#
exch PDFoffset add PDFfile exch setfileposition
PDFfile token pop 2 copy ne
{ (xref error!\n) print /resolveR cvx /rangecheck signalerror
}
if pop PDFfile token pop
PDFfile token pop /obj ne
{ (xref error!\n) print /resolveR cvx /rangecheck signalerror
}
if
pdf_run_resolve % PDFfile resolveopdict .pdfrun
}
{ Objects exch null lput pop null
}
ifelse exch PDFfile exch setfileposition
}
{ pop Objects exch lget
}
ifelse
} bind def
%================================ Streams ================================ %
% We represent a stream by an executable dictionary that contains,
% in addition to the contents of the original stream dictionary:
% /File - the file or string where the stream contents are stored;
% /FilePosition - iff File is a file, the position in the file
% where the contents start.
% /StreamKey - the key used to decrypt this stream if any
% We do the real work of constructing the data stream only when the
% contents are needed.
% Construct a stream. The length is not reliable in the face of
% different end-of-line conventions, but it's all we've got.
%
% PDF files are inconsistent about what may fall between the 'stream' keyword
% and the actual stream data, and it appears that no one algorithm can
% detect this reliably. We used to try to guess whether the file included
% extraneous \r and/or \n characters, but we no longer attempt to do so,
% especially since the PDF 1.2 specification states flatly that the only
% legal terminators following the 'stream' keyword are \n or \r\n, both of
% which are properly skipped and discarded by the token operator.
/stream
{ PDFsource PDFfile eq
{ dup /File PDFfile put
dup /FilePosition PDFfile fileposition put
DEBUG { (%FilePosition: ) print dup /FilePosition get == } if
PDFfile fileposition 1 index /Length oget add
PDFfile exch setfileposition
}
{ % We're already reading from a stream, which we can't reposition.
% Capture the sub-stream contents in a string.
dup /Length oget string PDFsource exch readstring
not
{ (Unexpected EOF in stream!\n) print
/stream cvx /rangecheck signalerror
}
if
1 index exch /File exch put
}
ifelse
PDFsource token pop
/endstream ne { /stream cvx /syntaxerror signalerror } if
cvx
} bind def
% Resolve a stream dictionary to a PostScript stream.
% Streams with no filters require special handling:
% - If we are going to interpret their contents, we let endstream
% terminate the interpretation loop;
% - If we are just going to read data from them, we impose
% a SubFileDecode filter that reads just the requisite amount of data.
% Note that, in general, resolving a stream repositions PDFfile.
% Clients must save and restore the position of PDFfile themselves.
/resolvestream % <streamdict> <readdata?> resolvestream <stream>
{ exch dup /FilePosition .knownget
{ 1 index /File get exch setfileposition }
if
% Stack: readdata? dict
dup /DecodeParms .knownget not { null } if
1 index /Filter .knownget not { {} } if
dup type /nametype eq
{ 1 array astore
1 index null ne { exch 1 array astore exch } if
}
if
% Stack: readdata? dict parms filternames
2 index /File get exch
% Stack: readdata? dict parms file/string filternames
pdf_decrypt_stream % add decryption if needed
dup length 0 eq
{ % All the PDF filters have EOD markers, but in this case
% there is no specified filter.
pop exch pop
% Stack: readdata? dict file/string
2 index
{ % We're going to read data; use a SubFileDecode filter.
1 index /Length oget () /SubFileDecode filter
}
{ dup type /filetype ne
{ % Use a SubFileDecode filter to read from a string.
0 () SubFileDecode filter
}
if
}
ifelse
}
{ 2 index null eq
{ { filter }
}
{ % Stack: readdata? dict parms file/string filtername
{ 2 index 0 get dup null eq { pop } { exch } ifelse filter
exch dup length 1 sub 1 exch getinterval exch
}
}
ifelse forall exch pop
}
ifelse
% Stack: readdata? dict file
exch pop exch pop
} bind def
/endstream { exit } def
end % pdfdict
.setglobal